import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
from matplotlib import cm
from jax.nn import one_hot
import jax.random as jrState Space models
Station Name: PHOENIX PARK Station Height: 48 M Latitude:53.364 ,Longitude: -6.350
date: - Date and Time (utc) rain: - Precipitation Amount (mm)
temp: - Air Temperature (C)
wetb: - Wet Bulb Temperature (C) dewpt: - Dew Point Temperature (C) vappr: - Vapour Pressure (hPa)
rhum: - Relative Humidity (%) msl: - Mean Sea Level Pressure (hPa) ind: - Indicator
df = pd.read_csv('irish_weather/hly175.csv')
df['date'] = pd.to_datetime(df['date'])
df['year'] = df['date'].dt.year
df.set_index('date', inplace=True)
def convert_float(col):
parsed = []
for i in range(len(df[col])):
try:
parsed.append(float(df.iloc[i][col]))
except Exception as e:
parsed.append(np.nan)
return parsed
df['temp'] = convert_float('temp')
df['rain'] = convert_float('rain')
df['wetb'] = convert_float('wetb')
df.head()/var/folders/__/ng_3_9pn1f11ftyml_qr69vh0000gn/T/ipykernel_79604/2027421057.py:1: DtypeWarning: Columns (2,4,6,7,8,9,10) have mixed types. Specify dtype option on import or set low_memory=False.
df = pd.read_csv('irish_weather/hly175.csv')
| ind | rain | ind.1 | temp | ind.2 | wetb | dewpt | vappr | rhum | msl | year | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| date | |||||||||||
| 2003-08-16 01:00:00 | 0 | 0.0 | 0 | 9.2 | 0 | 8.9 | 8.5 | 11.1 | 95 | 1021.9 | 2003 |
| 2003-08-16 02:00:00 | 0 | 0.0 | 0 | 9.0 | 0 | 8.7 | 8.5 | 11.1 | 96 | 1021.7 | 2003 |
| 2003-08-16 03:00:00 | 0 | 0.0 | 0 | 8.2 | 0 | 8.0 | 7.7 | 10.5 | 96 | 1021.2 | 2003 |
| 2003-08-16 04:00:00 | 0 | 0.0 | 0 | 8.4 | 0 | 8.1 | 7.9 | 10.7 | 97 | 1021.2 | 2003 |
| 2003-08-16 05:00:00 | 0 | 0.0 | 0 | 7.7 | 0 | 7.5 | 7.3 | 10.2 | 97 | 1021.1 | 2003 |
fig, axs = plt.subplots(3, 1, figsize=(20, 20))
axs = axs.flatten()
for y in df['year'].unique():
temp = df[df['year'] == y].reset_index().sort_values('date')
axs[0].plot(temp['temp'], label=f'Year {y}', alpha=0.4)
axs[1].plot(temp['rain'], label=f'Year {y}', alpha=0.4)
axs[2].plot(temp['wetb'], label=f'Year {y}', alpha=0.4)
axs[0].set_title("Annual Air Temperature: C")
axs[1].set_title("Annual Rain Fall")
axs[0].set_xticks([])
axs[1].set_xticks([])
axs[1].legend()<matplotlib.legend.Legend at 0x12c47d6f0>
from jax import vmap
df_2020 = df[df['year'] == 2020]
emissions = df_2020["temp"].to_numpy()
emissions
emissions = np.atleast_2d(emissions).T
emissionsarray([[6.8],
[6.9],
[6.2],
...,
[4.4],
[4.4],
[4.5]])
from dynamax.hidden_markov_model import CategoricalHMM, GaussianHMM
true_num_states = 2
emission_dim = 1
key = jr.PRNGKey(0)
hmm = GaussianHMM(true_num_states, emission_dim)
params, props = hmm.initialize(key=key, method="kmeans", emissions=emissions)/Users/nathanielforde/Documents/Github/NathanielF.github.io/.venv/lib/python3.10/site-packages/sklearn/cluster/_kmeans.py:870: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
warnings.warn(
params, lps = hmm.fit_em(params, props, emissions, num_iters=100)
params
100.00% [100/100 00:00<00:00]
ParamsGaussianHMM(initial=ParamsStandardHMMInitialState(probs=Array([0.916581 , 0.08341918], dtype=float32)), transitions=ParamsStandardHMMTransitions(transition_matrix=Array([[0.9762107 , 0.02378933],
[0.02288962, 0.9771104 ]], dtype=float32)), emissions=ParamsGaussianHMMEmissions(means=Array([[ 6.289909 ],
[14.2797365]], dtype=float32), covs=Array([[[7.8310475]],
[[9.284886 ]]], dtype=float32)))
most_likely_states = hmm.most_likely_states(params, emissions)
most_likely_statesArray([0, 0, 0, ..., 0, 0, 0], dtype=int32)
plt.plot(np.arange(most_likely_states.shape[0]), most_likely_states)fig, axs = plt.subplots(3,1, figsize=(16,10))
colours = cm.rainbow(np.linspace(0, 1, 4))
for i, (ax, colour) in enumerate(zip(axs, colours)):
# Use fancy indexing to plot data in each state.
mask = most_likely_states == i
#ax.plot(hist.reset_index()["Date"].iloc[mask], hist["Close"].iloc[mask], ".-", c=colour)
ax.plot(df_2020.index[mask], df_2020["temp"].to_numpy()[mask], ".-", c=colour)
ax.set_title("{0}th hidden state".format(i))
# Format the ticks.
ax.grid(True)
axs[2].plot(df_2020['temp'])num_timesteps = 365
fig, axs = plt.subplots(2, 1, figsize=(10, 6))
axs = axs.flatten()
for i in range(5):
true_states, emissions_samples = hmm.sample(params, jr.PRNGKey(i), num_timesteps)
axs[0].plot(emissions_samples)
axs[0].set_title("Samples from HMM")
axs[1].set_title("Samples States from HMM")
axs[1].plot(true_states)posterior = hmm.filter(params, emissions)
print(f"marginal likelihood: {posterior.marginal_loglik: .2f}")
print(f"posterior.filtered_probs.shape: {posterior.filtered_probs.shape}")marginal likelihood: -22534.17
posterior.filtered_probs.shape: (8784, 2)
def plot_posterior_probs(probs, states, title=""):
plt.imshow(states[None, :], extent=(0, num_timesteps, 0, 1),
interpolation="none", aspect="auto", cmap="Greys", alpha=0.25)
plt.plot(probs[:, 1]) # probability of the loaded state (z=1)
plt.xlabel("time")
plt.ylabel("p(summer)")
plt.ylim(0, 1)
plt.title(title)
plot_posterior_probs(posterior.filtered_probs, true_states,
title="Filtering Distribution")posterior_smooth = hmm.smoother(params, emissions)
print(f"posterior.smoothed_probs.shape: {posterior_smooth.smoothed_probs.shape}")posterior.smoothed_probs.shape: (8784, 2)
plot_posterior_probs(posterior_smooth.smoothed_probs, true_states,
title="Smoothing Distribution")